Author

Joe

Published

March 3, 2024

Code
start_time <- Sys.time()

suppressPackageStartupMessages(library(ggpath))
suppressPackageStartupMessages(library(plotly))
suppressPackageStartupMessages(library(tidyverse))

options(scipen = 10L)

jam_theme <-  theme_minimal() +
                theme(text=element_text(size=14),
                      axis.text=element_text(size=12),
                      axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
                            axis.title.x = element_text(margin = margin(t = 0, r = 0, b = 8, l = 0)))

jam_theme <-  theme_minimal() +
                theme(axis.title.y = element_text(margin = margin(t = 0, r = 8, b = 0, l = 0)),
                                        axis.title.x = element_text(margin = margin(t = 0, r = 0, b = 8, l = 0)),
                                        text=element_text(size=20, face="bold", color="white"),
                                    axis.text.x=element_text(size=10, color="white"),
                                        axis.text.y=element_text(size=13, color="white"),
                                        plot.title=element_text(face="bold", color="white"),
                                        plot.background = element_rect(fill = "#5E61AF"),
                                        plot.margin = margin(1,1,1.5,1.2, "cm"))

# Create color pallete based on lineup image @ https://coolors.co/image-picker
my_cols <- c("#52BFEC","#AA1880","#EC0059","#08BCDF","#4C1064", "#FF00BC", "#2249CD","#53007D", "#FF6B02","#B319B2","#EAE100", "#BF068F")

Summary

This notebook shows how I searched the Spotify and last.fm APIs to find data on EDC 2024 artists. I was curious to find the most popular artists.

EDC 2024 Lineup

EDC Lineup 2024

Convert lineup image to text

Create text list of EDC artists using imagetotext.io

Code
#https://www.imagetotext.io/ to get artist names from edc artist lineup PNG

edc_artists <- read_tsv("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artists_text_from_png.txt") |> unique() |> arrange(artist)

#edc_artists

Collect Artist Data

Spotify

Access Spotify API using package spotifyr

You need to set up a Dev account with Spotify to access their Web API here. It is very quick and easy.

Code
#install.packages("spotifyr")
library(spotifyr)

# set up a dev account to get valid API IDs
# Sys.setenv(SPOTIFY_CLIENT_ID = '########################')
# Sys.setenv(SPOTIFY_CLIENT_SECRET = '#########################')

access_token <- get_spotify_access_token()


# Use search_spotify() to find spotify artist ids from artist names
# No ID for Domina, Hint of Lavender, Marlie, VUIIIGUR

spotify_artist_id <- vector("character", length(edc_artists$artist))

for(i in seq_along(edc_artists$artist)){
    
    i_search_spotify <- search_spotify(edc_artists$artist[i])
    
    if(edc_artists$artist[i] %in% toupper(i_search_spotify$artists$items$name)){
        
        exact_artist_name_match <- which(toupper(i_search_spotify$artists$items$name) == edc_artists$artist[i])[1]
        spotify_artist_id[[i]] <- i_search_spotify$artists$items$id[exact_artist_name_match]
        
    }else{
    
        spotify_artist_id[[i]] <- NA
        cat(paste0("\nNo exact match for artist name: ",edc_artists$artist[i], "\n\n"))
        #                    "\n", "Using: ", i_search_spotify$artists$items$name[1], 
        #                    "\n", i_search_spotify$artists$items$external_urls.spotify[1], "\n\n"))
        
        # spotify_artist_id[[i]] <- i_search_spotify$artists$items$id[1]
        # cat(paste0("\nNo exact match for artist name: ",edc_artists$artist[i],
        #                    "\n", "Using: ", i_search_spotify$artists$items$name[1], 
        #                    "\n", i_search_spotify$artists$items$external_urls.spotify[1], "\n\n"))
    }

}

edc_artists$spotify_artist_id <- spotify_artist_id


# Use get_artist() to get genres, followers, and popularity

genres     <- vector("character", length(edc_artists$artist))
followers  <- vector("character", length(edc_artists$artist))
popularity <- vector("character", length(edc_artists$artist))
image_url  <- vector("character", length(edc_artists$artist))

for(i in seq_along(edc_artists$spotify_artist_id)){
    
    if(!is.na(edc_artists$spotify_artist_id[i])){
    
    i_artist_info <- get_artist(edc_artists$spotify_artist_id[i])
    
    genres[[i]]     <- paste0(i_artist_info$genres, collapse = ",")
    followers[[i]]  <- i_artist_info$followers$total
    popularity[[i]] <- i_artist_info$popularity
    image_url[[i]]  <- ifelse(!is.null(i_artist_info$images$url[1]), i_artist_info$images$url[1], NA)
    
    }else{
        
        genres[[i]] <- followers[[i]]  <- popularity[[i]] <- image_url[[i]] <- NA
        
    }
    
    if(!is.na(genres[[i]]) & genres[[i]] == ""){genres[[i]] <- NA}

}

edc_artists$genres     <- genres   
edc_artists$followers  <- as.numeric(followers)
edc_artists$popularity <- as.numeric(popularity)
edc_artists$image_url  <- image_url

last.fm

Access last.fm API using package lastfmR

Code
#devtools::install_github("ppatrzyk/lastfmR")
library(lastfmR)
# masks get_tracks()


lastfm_artist_info <- get_artist_info(artist_vector = edc_artists$artist) |> tibble()

edc_artists <- full_join(edc_artists, lastfm_artist_info)

# write.table(edc_artists, "C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artist_data.txt", row.names = F, quote = F, sep = "\t")

Spotify Followers

Code
edc_artists <- read_tsv("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artist_data.txt")

plot.top.artists <- function(column_name, top, plot_title, include_images = "false"){

i_plot <- arrange(edc_artists, desc(!!sym(column_name))) |>
                    head(top) 
            
p1 <- ggplot(i_plot, aes(reorder(artist, !!sym(column_name)), !!sym(column_name))) +
            geom_segment(aes(x = reorder(artist, !!sym(column_name)), xend = reorder(artist, !!sym(column_name)), y=0, yend = !!sym(column_name), color = artist),
                                     linewidth = 3)  +
        #geom_from_path(aes(path = image_url), width = 0.052) +
            coord_flip(clip = "off") +
            scale_color_manual(values = rep(my_cols,3)) +
            jam_theme +
            theme(axis.title.x=element_blank(),
                        axis.title.y=element_blank(),
                        legend.position = "none") +
            ggtitle(plot_title)

    if(include_images == "true"){
        p1 <- p1 + geom_from_path(aes(path = image_url), width = 0.052) 
    }

    return(p1)

}

plot.top.artists("followers", 10, "Top 10 EDC artists with the most followers on Spotify", include_images = "true")

Code
plot.top.artists("followers", 30, "Top 30 EDC artists with the most followers on Spotify", include_images = "false")

Spotify “Popularity”

Code
plot.top.artists("popularity", 10, "Top 10 most popular EDC artists according to Spotify", include_images = "true")

Code
plot.top.artists("popularity", 30, "Top 30 most popular EDC artists according to Spotify", include_images = "false")

last.fm Global Listeners

Code
plot.top.artists("global_listeners", 10, "Top 10 artists with the most listeners on last.fm", include_images = "true")

Code
plot.top.artists("global_listeners", 30, "Top 30 artists with the most listeners on last.fm", include_images = "false")

Genres

Code
# filter(edc_artists, !is.na(genres)) |> nrow()
# n_distinct(edc_artists$genres)
all_genres <- unlist(str_split(edc_artists$genres, ",")) |>
    na.omit() |> 
    as.character() |> 
    str_trim() |> 
    toupper() |> 
    tibble()
names(all_genres) <- "genre"

plot.top.genres <- function(all_genres, column_name, plot_title){
    
    i_plot <- group_by(all_genres, genre) |>
                        summarise(n_genres = dplyr::n()) |>
                        arrange(desc(n_genres)) |>
                        head(30) 
        
    p1 <- ggplot(i_plot, aes(reorder(genre, !!sym(column_name)), !!sym(column_name))) +
                geom_segment(aes(x = reorder(genre, !!sym(column_name)), xend = reorder(genre, !!sym(column_name)), y=0, yend = !!sym(column_name), color = genre),
                                         linewidth = 3)  +
                coord_flip(clip = "off") +
                scale_color_manual(values = rep(my_cols,100)) +
                jam_theme +
                theme(axis.title.x=element_blank(),
                            axis.title.y=element_blank(),
                            legend.position = "none") +
                ggtitle(plot_title)

    return(p1)
                
}

plot.top.genres(all_genres, "n_genres", "Top 30 most represented Spotify genres")

Code
# # filter(edc_artists, !is.na(artist_tags)) |> nrow()
# # n_distinct(edc_artists$artist_tags)
# all_genres <- unlist(str_split(edc_artists$artist_tags, ";")) |> 
#   na.omit() |> 
#   as.character() |> 
#   str_trim() |> 
#   toupper() |> 
#   tibble()
# names(all_genres) <- "genre"
# 
# 
# plot.top.genres(all_genres, "n_genres", "Top 30 most represented last.fm aritst tags")

Notes

Run time

Code
Sys.time() - start_time
Time difference of 37.04075 secs

Session

Code
sessionInfo()
R version 4.3.2 (2023-10-31 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19045)

Matrix products: default


locale:
[1] LC_COLLATE=English_United States.utf8 
[2] LC_CTYPE=English_United States.utf8   
[3] LC_MONETARY=English_United States.utf8
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.utf8    

time zone: America/Los_Angeles
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] lubridate_1.9.3 forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4    
 [5] purrr_1.0.2     readr_2.1.4     tidyr_1.3.0     tibble_3.2.1   
 [9] tidyverse_2.0.0 plotly_4.10.3   ggplot2_3.4.4   ggpath_1.0.1   

loaded via a namespace (and not attached):
 [1] utf8_1.2.4         generics_0.1.3     stringi_1.8.3      hms_1.1.3         
 [5] digest_0.6.33      magrittr_2.0.3     evaluate_0.23      grid_4.3.2        
 [9] timechange_0.2.0   fastmap_1.1.1      jsonlite_1.8.8     httr_1.4.7        
[13] fansi_1.0.6        viridisLite_0.4.2  scales_1.3.0       lazyeval_0.2.2    
[17] cli_3.6.2          crayon_1.5.2       rlang_1.1.2        bit64_4.0.5       
[21] munsell_0.5.0      withr_2.5.2        cachem_1.0.8       yaml_2.3.8        
[25] parallel_4.3.2     tools_4.3.2        tzdb_0.4.0         memoise_2.0.1     
[29] colorspace_2.1-0   curl_5.2.0         vctrs_0.6.5        R6_2.5.1          
[33] magick_2.8.1       lifecycle_1.0.4    bit_4.0.5          htmlwidgets_1.6.4 
[37] vroom_1.6.5        pkgconfig_2.0.3    pillar_1.9.0       gtable_0.3.4      
[41] Rcpp_1.0.11        data.table_1.14.10 glue_1.6.2         xfun_0.41         
[45] tidyselect_1.2.0   rstudioapi_0.15.0  knitr_1.45         farver_2.1.1      
[49] htmltools_0.5.7    labeling_0.4.3     rmarkdown_2.25     compiler_4.3.2